Data Science Capstone project
Course 9 project
Imports
!pip install folium
import pandas as pd
import numpy as np
import requests
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
Requirement already satisfied: folium in /home/denys/anaconda3/lib/python3.8/site-packages (0.12.0) Requirement already satisfied: numpy in /home/denys/anaconda3/lib/python3.8/site-packages (from folium) (1.19.2) Requirement already satisfied: branca>=0.3.0 in /home/denys/anaconda3/lib/python3.8/site-packages (from folium) (0.4.2) Requirement already satisfied: jinja2>=2.9 in /home/denys/anaconda3/lib/python3.8/site-packages (from folium) (2.11.2) Requirement already satisfied: requests in /home/denys/anaconda3/lib/python3.8/site-packages (from folium) (2.24.0) Requirement already satisfied: MarkupSafe>=0.23 in /home/denys/anaconda3/lib/python3.8/site-packages (from jinja2>=2.9->folium) (1.1.1) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/denys/anaconda3/lib/python3.8/site-packages (from requests->folium) (1.25.11) Requirement already satisfied: certifi>=2017.4.17 in /home/denys/anaconda3/lib/python3.8/site-packages (from requests->folium) (2020.6.20) Requirement already satisfied: idna<3,>=2.5 in /home/denys/anaconda3/lib/python3.8/site-packages (from requests->folium) (2.10) Requirement already satisfied: chardet<4,>=3.0.2 in /home/denys/anaconda3/lib/python3.8/site-packages (from requests->folium) (3.0.4)
Load and wrangle indexes data from Wikipedia
# Load raw data from Wikipedia
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
# Remove NA Boroughs
is_not_na = df['Borough'] != 'Not assigned'
df1 = df[is_not_na]
# Treat NA Neighbourhoods
df2 = df1.copy()
for index, row in df2.iterrows():
if df2.loc[index,'Neighbourhood'] == 'Not assigned':
df2.loc[index,'Neighbourhood'] = df2.loc[index,'Borough']
# Note: Rows are already merged by Postal Code
View DF shape
df2.shape
(103, 3)
Load coordinates data
coordinates_df = pd.read_csv('https://cocl.us/Geospatial_data')
coordinates_df.head()
| Postal Code | Latitude | Longitude | |
|---|---|---|---|
| 0 | M1B | 43.806686 | -79.194353 |
| 1 | M1C | 43.784535 | -79.160497 |
| 2 | M1E | 43.763573 | -79.188711 |
| 3 | M1G | 43.770992 | -79.216917 |
| 4 | M1H | 43.773136 | -79.239476 |
Add Coordinates
df3 = pd.merge(left=df2, right=coordinates_df, left_on='Postal Code', right_on='Postal Code')
print(df3.shape)
df3.head()
(103, 5)
| Postal Code | Borough | Neighbourhood | Latitude | Longitude | |
|---|---|---|---|---|---|
| 0 | M3A | North York | Parkwoods | 43.753259 | -79.329656 |
| 1 | M4A | North York | Victoria Village | 43.725882 | -79.315572 |
| 2 | M5A | Downtown Toronto | Regent Park, Harbourfront | 43.654260 | -79.360636 |
| 3 | M6A | North York | Lawrence Manor, Lawrence Heights | 43.718518 | -79.464763 |
| 4 | M7A | Downtown Toronto | Queen's Park, Ontario Provincial Government | 43.662301 | -79.389494 |
CLIENT_ID = 'SBPPJXXVK3DNXUMMAEXGNQ5DRZDY5DAPBQB5RS3VDEVBIYAD' # your Foursquare ID
CLIENT_SECRET = '{hidden}' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
def getNearbyVenues(names, latitudes, longitudes, radius=500):
venues_list=[]
for name, lat, lng in zip(names, latitudes, longitudes):
#print(name)
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
lat,
lng,
radius,
LIMIT)
# make the GET request
results = requests.get(url).json()["response"]['groups'][0]['items']
# return only relevant information for each nearby venue
venues_list.append([(
name,
lat,
lng,
v['venue']['name'],
v['venue']['location']['lat'],
v['venue']['location']['lng'],
v['venue']['categories'][0]['name']) for v in results])
nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
nearby_venues.columns = ['Neighborhood',
'Neighborhood Latitude',
'Neighborhood Longitude',
'Venue',
'Venue Latitude',
'Venue Longitude',
'Venue Category']
return(nearby_venues)
venues = getNearbyVenues(names=df3['Neighbourhood'],
latitudes=df3['Latitude'],
longitudes=df3['Longitude'],
radius=500
)
print(venues.shape)
venues.head()
(2117, 7)
| Neighborhood | Neighborhood Latitude | Neighborhood Longitude | Venue | Venue Latitude | Venue Longitude | Venue Category | |
|---|---|---|---|---|---|---|---|
| 0 | Parkwoods | 43.753259 | -79.329656 | Brookbanks Park | 43.751976 | -79.332140 | Park |
| 1 | Parkwoods | 43.753259 | -79.329656 | Variety Store | 43.751974 | -79.333114 | Food & Drink Shop |
| 2 | Victoria Village | 43.725882 | -79.315572 | Victoria Village Arena | 43.723481 | -79.315635 | Hockey Arena |
| 3 | Victoria Village | 43.725882 | -79.315572 | Portugril | 43.725819 | -79.312785 | Portuguese Restaurant |
| 4 | Victoria Village | 43.725882 | -79.315572 | Tim Hortons | 43.725517 | -79.313103 | Coffee Shop |
map1 = folium.Map(location=[df3.loc[0, 'Latitude'], df3.loc[0, 'Longitude']], zoom_start=11)
# add markers to map
for lat, lng, label in zip(venues['Venue Latitude'], venues['Venue Longitude'], venues['Neighborhood']):
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lng],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False).add_to(map1)
map1
df31 = df3[df3['Borough'].str.contains('Toronto')]
venues31 = getNearbyVenues(names=df31['Neighbourhood'],
latitudes=df31['Latitude'],
longitudes=df31['Longitude']
)
map31 = folium.Map(location=[df31.loc[2, 'Latitude'], df31.loc[2, 'Longitude']], zoom_start=11)
# add markers to map
for lat, lng, label in zip(venues31['Venue Latitude'], venues31['Venue Longitude'], venues31['Neighborhood']):
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lng],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False).add_to(map31)
map31
# one hot encoding
toronto_onehot = pd.get_dummies(venues31[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = venues31['Neighborhood']
# move neighborhood column to the first column
nbh = toronto_onehot['Neighborhood']
toronto_onehot.drop(labels=['Neighborhood'], axis=1,inplace = True)
toronto_onehot.insert(0, 'Neighborhood', nbh)
toronto_onehot.head()
| Neighborhood | Airport | Airport Food Court | Airport Lounge | Airport Service | Airport Terminal | American Restaurant | Antique Shop | Aquarium | Art Gallery | ... | Tibetan Restaurant | Toy / Game Store | Trail | Train Station | Vegetarian / Vegan Restaurant | Video Game Store | Vietnamese Restaurant | Wine Bar | Wine Shop | Yoga Studio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Regent Park, Harbourfront | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | Regent Park, Harbourfront | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | Regent Park, Harbourfront | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | Regent Park, Harbourfront | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | Regent Park, Harbourfront | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 234 columns
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped
| Neighborhood | Airport | Airport Food Court | Airport Lounge | Airport Service | Airport Terminal | American Restaurant | Antique Shop | Aquarium | Art Gallery | ... | Tibetan Restaurant | Toy / Game Store | Trail | Train Station | Vegetarian / Vegan Restaurant | Video Game Store | Vietnamese Restaurant | Wine Bar | Wine Shop | Yoga Studio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Berczy Park | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.017544 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.017544 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 1 | Brockton, Parkdale Village, Exhibition Place | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 2 | Business reply mail Processing Centre, South C... | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 3 | CN Tower, King and Spadina, Railway Lands, Har... | 0.066667 | 0.066667 | 0.133333 | 0.2 | 0.133333 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 4 | Central Bay Street | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.016393 | 0.000000 | 0.000000 | 0.016393 | 0.000000 | 0.016393 |
| 5 | Christie | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 6 | Church and Wellesley | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.012658 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.012658 | 0.000000 | 0.000000 | 0.025316 |
| 7 | Commerce Court, Victoria Hotel | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.040000 | 0.000000 | 0.00 | 0.010000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.020000 | 0.000000 | 0.000000 | 0.010000 | 0.000000 | 0.000000 |
| 8 | Davisville | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.030303 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.030303 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 9 | Davisville North | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 10 | Dufferin, Dovercourt Village | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 11 | First Canadian Place, Underground city | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.030000 | 0.000000 | 0.00 | 0.010000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.01 | 0.010000 | 0.000000 | 0.000000 | 0.010000 | 0.000000 | 0.000000 |
| 12 | Forest Hill North & West, Forest Hill Road Park | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.250000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 13 | Garden District, Ryerson | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.010000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.010000 | 0.010000 | 0.010000 | 0.000000 | 0.000000 |
| 14 | Harbourfront East, Union Station, Toronto Islands | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.05 | 0.010000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.01 | 0.010000 | 0.000000 | 0.000000 | 0.010000 | 0.000000 | 0.000000 |
| 15 | High Park, The Junction South | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.041667 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 16 | India Bazaar, The Beaches West | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 17 | Kensington Market, Chinatown, Grange Park | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.046875 | 0.000000 | 0.046875 | 0.015625 | 0.000000 | 0.000000 |
| 18 | Lawrence Park | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 19 | Little Portugal, Trinity | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.023256 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.046512 | 0.000000 | 0.046512 | 0.023256 | 0.000000 | 0.023256 |
| 20 | Moore Park, Summerhill East | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 21 | North Toronto West, Lawrence Park | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.047619 |
| 22 | Parkdale, Roncesvalles | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 23 | Queen's Park, Ontario Provincial Government | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.028571 |
| 24 | Regent Park, Harbourfront | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.021739 | 0.00 | 0.021739 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.021739 | 0.021739 |
| 25 | Richmond, Adelaide, King | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.020619 | 0.000000 | 0.00 | 0.010309 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.010309 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 26 | Rosedale | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.250000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 27 | Roselawn | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 28 | Runnymede, Swansea | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.030303 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.030303 |
| 29 | St. James Town | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.037500 | 0.000000 | 0.00 | 0.012500 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.012500 | 0.000000 | 0.000000 | 0.012500 | 0.000000 | 0.000000 |
| 30 | St. James Town, Cabbagetown | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 31 | Stn A PO Boxes | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.010309 | 0.010309 | 0.00 | 0.020619 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.010309 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.010309 |
| 32 | Studio District | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.054054 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.027027 |
| 33 | Summerhill West, Rathnelly, South Hill, Forest... | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.066667 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.066667 | 0.000000 | 0.000000 | 0.000000 |
| 34 | The Annex, North Midtown, Yorkville | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 35 | The Beaches | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.250000 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 36 | The Danforth West, Riverdale | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.023256 | 0.000000 | 0.00 | 0.000000 | ... | 0.023256 | 0.000000 | 0.023256 | 0.00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.023256 |
| 37 | Toronto Dominion Centre, Design Exchange | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.030000 | 0.000000 | 0.00 | 0.010000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.01 | 0.010000 | 0.000000 | 0.000000 | 0.010000 | 0.000000 | 0.000000 |
| 38 | University of Toronto, Harbord | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.00 | 0.000000 | 0.030303 | 0.000000 | 0.000000 | 0.000000 | 0.030303 |
39 rows × 234 columns
#
def return_most_common_venues(row, num_top_venues):
row_categories = row.iloc[1:]
row_categories_sorted = row_categories.sort_values(ascending=False)
return row_categories_sorted.index.values[0:num_top_venues]
#
# Let's take only 7 as the density seems to be lower than in NY where value of 10 was used
#
num_top_venues = 7
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
try:
columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
except:
columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']
for ind in np.arange(toronto_grouped.shape[0]):
neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.head()
| Neighborhood | 1st Most Common Venue | 2nd Most Common Venue | 3rd Most Common Venue | 4th Most Common Venue | 5th Most Common Venue | 6th Most Common Venue | 7th Most Common Venue | |
|---|---|---|---|---|---|---|---|---|
| 0 | Berczy Park | Coffee Shop | Cocktail Bar | Beer Bar | Cheese Shop | Bakery | Restaurant | Farmers Market |
| 1 | Brockton, Parkdale Village, Exhibition Place | Café | Breakfast Spot | Nightclub | Coffee Shop | Pet Store | Climbing Gym | Burrito Place |
| 2 | Business reply mail Processing Centre, South C... | Light Rail Station | Auto Workshop | Pizza Place | Comic Shop | Restaurant | Burrito Place | Brewery |
| 3 | CN Tower, King and Spadina, Railway Lands, Har... | Airport Service | Airport Lounge | Airport Terminal | Airport | Boutique | Sculpture Garden | Plane |
| 4 | Central Bay Street | Coffee Shop | Sandwich Place | Café | Italian Restaurant | Burger Joint | Bubble Tea Shop | Salad Place |
# set number of clusters
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1],
dtype=int32)
# add clustering labels
neighborhoods_venues_sorted_w_clusters = neighborhoods_venues_sorted.copy()
neighborhoods_venues_sorted_w_clusters.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = df31.copy()
toronto_merged.rename(columns={"Neighbourhood": "Neighborhood"}, inplace=True)
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted_w_clusters.set_index('Neighborhood'), on='Neighborhood')
# create map
map_clusters = folium.Map(location=[df31.loc[2, 'Latitude'], df31.loc[2, 'Longitude']], zoom_start=11)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
folium.CircleMarker(
[lat, lon],
radius=5,
popup=label,
color=rainbow[cluster-1],
fill=True,
fill_color=rainbow[cluster-1],
fill_opacity=0.7).add_to(map_clusters)
map_clusters
toronto_merged.head(50)
| Postal Code | Borough | Neighborhood | Latitude | Longitude | Cluster Labels | 1st Most Common Venue | 2nd Most Common Venue | 3rd Most Common Venue | 4th Most Common Venue | 5th Most Common Venue | 6th Most Common Venue | 7th Most Common Venue | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | M5A | Downtown Toronto | Regent Park, Harbourfront | 43.654260 | -79.360636 | 1 | Coffee Shop | Café | Park | Bakery | Breakfast Spot | Pub | Theater |
| 4 | M7A | Downtown Toronto | Queen's Park, Ontario Provincial Government | 43.662301 | -79.389494 | 1 | Coffee Shop | Sushi Restaurant | Bank | Bar | Portuguese Restaurant | Beer Bar | Diner |
| 9 | M5B | Downtown Toronto | Garden District, Ryerson | 43.657162 | -79.378937 | 1 | Coffee Shop | Clothing Store | Japanese Restaurant | Hotel | Bubble Tea Shop | Middle Eastern Restaurant | Cosmetics Shop |
| 15 | M5C | Downtown Toronto | St. James Town | 43.651494 | -79.375418 | 1 | Coffee Shop | Café | Gastropub | Cocktail Bar | American Restaurant | Italian Restaurant | Hotel |
| 19 | M4E | East Toronto | The Beaches | 43.676357 | -79.293031 | 1 | Health Food Store | Pub | Trail | Yoga Studio | Dog Run | Dessert Shop | Diner |
| 20 | M5E | Downtown Toronto | Berczy Park | 43.644771 | -79.373306 | 1 | Coffee Shop | Cocktail Bar | Beer Bar | Cheese Shop | Bakery | Restaurant | Farmers Market |
| 24 | M5G | Downtown Toronto | Central Bay Street | 43.657952 | -79.387383 | 1 | Coffee Shop | Sandwich Place | Café | Italian Restaurant | Burger Joint | Bubble Tea Shop | Salad Place |
| 25 | M6G | Downtown Toronto | Christie | 43.669542 | -79.422564 | 1 | Grocery Store | Café | Park | Nightclub | Coffee Shop | Italian Restaurant | Baby Store |
| 30 | M5H | Downtown Toronto | Richmond, Adelaide, King | 43.650571 | -79.384568 | 1 | Coffee Shop | Café | Restaurant | Clothing Store | Deli / Bodega | Hotel | Gym |
| 31 | M6H | West Toronto | Dufferin, Dovercourt Village | 43.669005 | -79.442259 | 1 | Pharmacy | Bakery | Middle Eastern Restaurant | Grocery Store | Park | Bar | Supermarket |
| 36 | M5J | Downtown Toronto | Harbourfront East, Union Station, Toronto Islands | 43.640816 | -79.381752 | 1 | Coffee Shop | Aquarium | Café | Hotel | Brewery | Scenic Lookout | Italian Restaurant |
| 37 | M6J | West Toronto | Little Portugal, Trinity | 43.647927 | -79.419750 | 1 | Bar | Coffee Shop | Asian Restaurant | Vietnamese Restaurant | Restaurant | Vegetarian / Vegan Restaurant | Café |
| 41 | M4K | East Toronto | The Danforth West, Riverdale | 43.679557 | -79.352188 | 1 | Greek Restaurant | Coffee Shop | Italian Restaurant | Bookstore | Ice Cream Shop | Furniture / Home Store | Liquor Store |
| 42 | M5K | Downtown Toronto | Toronto Dominion Centre, Design Exchange | 43.647177 | -79.381576 | 1 | Coffee Shop | Hotel | Café | Salad Place | American Restaurant | Japanese Restaurant | Italian Restaurant |
| 43 | M6K | West Toronto | Brockton, Parkdale Village, Exhibition Place | 43.636847 | -79.428191 | 1 | Café | Breakfast Spot | Nightclub | Coffee Shop | Pet Store | Climbing Gym | Burrito Place |
| 47 | M4L | East Toronto | India Bazaar, The Beaches West | 43.668999 | -79.315572 | 1 | Fast Food Restaurant | Sushi Restaurant | Pub | Liquor Store | Sandwich Place | Burrito Place | Board Shop |
| 48 | M5L | Downtown Toronto | Commerce Court, Victoria Hotel | 43.648198 | -79.379817 | 1 | Coffee Shop | Restaurant | Hotel | Café | Italian Restaurant | Gym | American Restaurant |
| 54 | M4M | East Toronto | Studio District | 43.659526 | -79.340923 | 1 | Coffee Shop | American Restaurant | Bakery | Brewery | Café | Gastropub | Yoga Studio |
| 61 | M4N | Central Toronto | Lawrence Park | 43.728020 | -79.388790 | 1 | Park | Bus Line | Business Service | Swim School | Yoga Studio | Dessert Shop | Escape Room |
| 62 | M5N | Central Toronto | Roselawn | 43.711695 | -79.416936 | 0 | Ice Cream Shop | Home Service | Garden | Health & Beauty Service | Dessert Shop | Diner | Discount Store |
| 67 | M4P | Central Toronto | Davisville North | 43.712751 | -79.390197 | 1 | Gym / Fitness Center | Hotel | Pizza Place | Department Store | Sandwich Place | Breakfast Spot | Food & Drink Shop |
| 68 | M5P | Central Toronto | Forest Hill North & West, Forest Hill Road Park | 43.696948 | -79.411307 | 3 | Jewelry Store | Trail | Mexican Restaurant | Sushi Restaurant | Yoga Studio | Department Store | Escape Room |
| 69 | M6P | West Toronto | High Park, The Junction South | 43.661608 | -79.464763 | 1 | Thai Restaurant | Mexican Restaurant | Café | Discount Store | Italian Restaurant | Bar | Fried Chicken Joint |
| 73 | M4R | Central Toronto | North Toronto West, Lawrence Park | 43.715383 | -79.405678 | 1 | Clothing Store | Coffee Shop | Yoga Studio | Sporting Goods Shop | Grocery Store | Ice Cream Shop | Fast Food Restaurant |
| 74 | M5R | Central Toronto | The Annex, North Midtown, Yorkville | 43.672710 | -79.405678 | 1 | Café | Sandwich Place | Coffee Shop | Liquor Store | Indian Restaurant | Pub | BBQ Joint |
| 75 | M6R | West Toronto | Parkdale, Roncesvalles | 43.648960 | -79.456325 | 1 | Breakfast Spot | Gift Shop | Restaurant | Cuban Restaurant | Eastern European Restaurant | Dog Run | Italian Restaurant |
| 79 | M4S | Central Toronto | Davisville | 43.704324 | -79.388790 | 1 | Sandwich Place | Dessert Shop | Café | Italian Restaurant | Sushi Restaurant | Gym | Coffee Shop |
| 80 | M5S | Downtown Toronto | University of Toronto, Harbord | 43.662696 | -79.400049 | 1 | Café | Bookstore | Bar | Italian Restaurant | Japanese Restaurant | Bakery | Dessert Shop |
| 81 | M6S | West Toronto | Runnymede, Swansea | 43.651571 | -79.484450 | 1 | Coffee Shop | Café | Sushi Restaurant | Italian Restaurant | Pub | Latin American Restaurant | Bar |
| 83 | M4T | Central Toronto | Moore Park, Summerhill East | 43.689574 | -79.383160 | 2 | Summer Camp | Yoga Studio | Farmers Market | Event Space | Ethiopian Restaurant | Escape Room | Electronics Store |
| 84 | M5T | Downtown Toronto | Kensington Market, Chinatown, Grange Park | 43.653206 | -79.400049 | 1 | Café | Coffee Shop | Vietnamese Restaurant | Vegetarian / Vegan Restaurant | Mexican Restaurant | Caribbean Restaurant | Grocery Store |
| 86 | M4V | Central Toronto | Summerhill West, Rathnelly, South Hill, Forest... | 43.686412 | -79.400049 | 1 | Coffee Shop | Bagel Shop | Pizza Place | Supermarket | Sushi Restaurant | Light Rail Station | Fried Chicken Joint |
| 87 | M5V | Downtown Toronto | CN Tower, King and Spadina, Railway Lands, Har... | 43.628947 | -79.394420 | 1 | Airport Service | Airport Lounge | Airport Terminal | Airport | Boutique | Sculpture Garden | Plane |
| 91 | M4W | Downtown Toronto | Rosedale | 43.679563 | -79.377529 | 4 | Park | Playground | Trail | Yoga Studio | Deli / Bodega | Escape Room | Electronics Store |
| 92 | M5W | Downtown Toronto | Stn A PO Boxes | 43.646435 | -79.374846 | 1 | Coffee Shop | Cocktail Bar | Seafood Restaurant | Italian Restaurant | Hotel | Japanese Restaurant | Restaurant |
| 96 | M4X | Downtown Toronto | St. James Town, Cabbagetown | 43.667967 | -79.367675 | 1 | Restaurant | Pizza Place | Coffee Shop | Bakery | Pub | Pet Store | Italian Restaurant |
| 97 | M5X | Downtown Toronto | First Canadian Place, Underground city | 43.648429 | -79.382280 | 1 | Coffee Shop | Café | Hotel | Restaurant | Gym | Japanese Restaurant | Deli / Bodega |
| 99 | M4Y | Downtown Toronto | Church and Wellesley | 43.665860 | -79.383160 | 1 | Coffee Shop | Sushi Restaurant | Japanese Restaurant | Restaurant | Gay Bar | Fast Food Restaurant | Yoga Studio |
| 100 | M7Y | East Toronto | Business reply mail Processing Centre, South C... | 43.662744 | -79.321558 | 1 | Light Rail Station | Auto Workshop | Pizza Place | Comic Shop | Restaurant | Burrito Place | Brewery |
Neighbourhoods are pretty similar in the central part of Toronto. Most of them have a wide variaty of coffe shops, restaraunts and other entertainment-related places. Business ventures are also highly popular, but are not widely represented in an available data set.